** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Table O.7: Priority and P1 Subject-Specific Performance - Did not pass P1 - Quintiles ////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

estimates clear

***************************************************************************
************************ Main estimate p1 survivors ***********************
***************************************************************************

** Opening Phase 2 norm_p1scores dataset 
use "Work Data/Gender_Phase2_long.dta",clear

*** Creating variables
encode subject, gen (sub)
tab subject, gen (d_sub)
label var sub "Subject"

** Subject dummies
rename d_sub1 Biology
rename d_sub2 Chemistry
rename d_sub3 Geography
rename d_sub4 History
rename d_sub5 Language
rename d_sub6 Mathematics
rename d_sub7 Physics
rename d_sub8 Portuguese
* Labels
label var Biology "Biology"
label var Chemistry "Chemistry"
label var Geography "Geography"
label var History "History"
label var Math "Mathematics"
label var Physics "Physics"
label var Portuguese "Portuguese"
label var Language "Foreign Language"

** Interaction: priority X female
gen fem_priority=female*priority
label var fem_priority "Female $\times$ Future priority"

** Interaction: priority X subject
foreach v of varlist Biology-Portuguese {
gen fem_`v'=`v'*female
label var fem_`v' "Female $\times$ `v'"
gen prio_`v'=priority*`v'
label var prio_`v' "Future priority $\times$ `v'"
gen fem_prio_`v'=fem_priority*`v'
label var fem_prio_`v' "Female $\times$ Future priority $\times$ `v'"
}

global subject "Chemistry Geography History Mathematics Physics"
global subject_fem "fem_Chemistry fem_Geography fem_History fem_Mathematics fem_Physics"

*********************************************************************************
****************   Relative performances ****************************************
*********************************************************************************

******************************** ENEM ********************************

foreach v in norm_enem_w {
bys year female: egen `v'_ave_g=mean(`v')
gen `v'_g=`v'-`v'_ave_g
bys year female: sum `v'_g
}
drop  norm_enem_w_ave_g

* Interaction: subject X ENEM
foreach v of varlist Biology-Portuguese {
gen enem_`v'=`v'*norm_enem_w_g
label var enem_`v' "ENEM scores $\times$ `v'"
gen fem_enem_`v'=female*norm_enem_w_g*`v'
label var fem_enem_`v' "Female $\times$ ENEM scores $\times$ `v'"
forvalues i=2(1)4 {
gen enem_`v'_`i'=enem_`v'^`i'
gen fem_enem_`v'_`i'=fem_enem_`v'^`i'
}
sum enem_`v'* fem_enem_`v'*
}

global g_pol_enem_sub "enem_Chemistry* enem_Geography* enem_History* enem_Mathematics* enem_Physics*"
d $g_pol_enem_sub

* Priority x relative performance in ENEM:
foreach v in norm_enem_w {
gen `v'_priority_g=`v'_g*priority
forvalues i=2(1)4 {
gen `v'_priority_g`i'=`v'_g^`i'*priority
sum `v'_priority_g`i'
}
}

global g_norm_enem_w_prio norm_enem_w_priority_g*
d $g_norm_enem_w_prio

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
drop if year==2000
tab year

* 2) Drop Portuguese and Foreign Language (in Phase 1 there is no Portuguese or Foreign Language exams - For Portuguese Phase 1 has an essay)
 tab subject, sum(norm_p1score)
 drop if subject=="lang" | subject=="port" 
 tab subject, sum(norm_p1score)
 drop Language Portuguese prio_Language prio_Portuguese fem_prio_Language fem_prio_Portuguese 

 tab pass_st1 // P1 survivors

******************************************************************************
***************************** Dep var - P1 score *****************************
******************************************************************************

** Labeling variables
label var priority "Future priority"
label var female "Female"
label var norm_enem_w_g "ENEM"

sum norm_p1score
xtsum norm_p1score
 reghdfe norm_p1score priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio, cluster(inscri2) absorb(inscri2)
 estimates store p1survivors
 estadd ysumm
 sum female if e(sample)
 scalar share_fem = r(mean)
 eststo p1survivors, addscalars(share_female share_fem)

estadd local sub_fe "Yes": p1survivors
estadd local subgender_fe "Yes": p1survivors
estadd local ind_fe "Yes": p1survivors
estadd local enemsub "Yes":  p1survivors
estadd local enemprio_pol4 "Yes": p1survivors

*******************************************************************************************************************
****** Scores normalized by year and subject: by ENEM quintiles for those that did not pass P1 ********************
*******************************************************************************************************************

** Opening Phase 1 norm_scores dataset 
use "Work Data/Gender_Phase1_long.dta",clear

*** Creating variables
encode subject, gen (sub)
tab subject, gen (d_sub)
label var sub "Subject"

** Subject dummies
rename d_sub1 Biology
rename d_sub2 Chemistry
rename d_sub3 Geography
rename d_sub4 History
rename d_sub5 Mathematics
rename d_sub6 Physics
rename d_sub7 Portuguese
* Labels
label var Biology "Biology"
label var Chemistry "Chemistry"
label var Geography "Geography"
label var History "History"
label var Math "Mathematics"
label var Physics "Physics"
label var Portuguese "Portuguese"

** Interaction: priority X female
gen fem_priority=female*priority
label var fem_priority "Female $\times$ Future Priority"

** Interaction: priority X subject
foreach v of varlist Biology-Portuguese {
gen fem_`v'=`v'*female
label var fem_`v' "Female $\times$ `v'"
gen prio_`v'=priority*`v'
label var prio_`v' "Future priority $\times$ `v'"
gen fem_prio_`v'=fem_priority*`v'
label var fem_prio_`v' "Female $\times$ Future priority $\times$ `v'"
}

global subject "Chemistry Geography History Mathematics Physics"
global subject_fem "fem_Chemistry fem_Geography fem_History fem_Mathematics fem_Physics"

*********************************************************************************
****************   Relative performances ****************************************
*********************************************************************************

** ENEM
foreach v in norm_enem_w {
bys year female: egen `v'_ave_g=mean(`v')
gen `v'_g=`v'-`v'_ave_g
bys year female: sum `v'_g
}
drop norm_enem_w_ave_g

* Interaction: subject X ENEM
foreach v of varlist Biology-Portuguese {
gen enem_`v'=`v'*norm_enem_w_g
label var enem_`v' "ENEM scores $\times$ `v'"
gen fem_enem_`v'=female*norm_enem_w_g*`v'
label var fem_enem_`v' "Female $\times$ ENEM scores $\times$ `v'"
forvalues i=2(1)4 {
gen enem_`v'_`i'=enem_`v'^`i'
gen fem_enem_`v'_`i'=fem_enem_`v'^`i'
}
sum enem_`v'* fem_enem_`v'*
}

global g_pol_enem_sub "enem_Chemistry* enem_Geography* enem_History* enem_Mathematics* enem_Physics*"
d $g_pol_enem_sub

* Priority x relative performance in ENEM:
foreach v in norm_enem_w {
gen `v'_priority_g=`v'_g*priority
forvalues i=2(1)4 {
gen `v'_priority_g`i'=`v'_g^`i'*priority
sum `v'_priority_g`i'
}
}

global g_norm_enem_w_prio norm_enem_w_priority_g*
d $g_norm_enem_w_prio

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
drop if year == 2000
tab year

* 2) Drop Portuguese and Foreign Language (in Phase 1 there is no Portuguese or Foreign Language exams - For Portuguese Phase 1 has an essay)
 drop if subject=="lang" | subject=="port" 
 drop Portuguese prio_Portuguese fem_prio_Portuguese 
 
 * 3) those that did not pass P1
 tab pass_st1
 keep if pass_st1==0 
 tab pass_st1

*********************************************************************************
****************   Regressions **************************************************
*********************************************************************************

** Labeling variables
label var priority "Future Priority"
label var female "Female"
label var norm_enem_w_g "ENEM"

***** Generating deciles for this sample 
drop quintiles
* Creating ENEM quintiles
so year (norm_ranking), stable
egen quintiles=xtile(norm_ranking), nq(5) by(year)
tab quintiles, sum(enem)

foreach y in quintile {
levelsof `y's, local(levels) 
foreach i of local levels {

reghdfe norm_score priority fem_priority $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio if `y's==`i', cluster(inscri2) absorb(inscri2)  
estimates store `y'`i'
estadd ysumm
sum female if e(sample)
scalar share_fem = r(mean) 
eststo `y'`i', addscalars(share_female share_fem)
}

estadd local sub_fe "Yes": `y'*
estadd local subgender_fe "Yes": `y'*
estadd local ind_fe "Yes": `y'*
estadd local enemsub "Yes":  `y'* 
estadd local enemprio_pol4 "Yes": `y'* 

* Tex
esttab `y'* p1survivors using "Output/p_general_result_phase1score_applicantsonlyp1_ENEM_`y'.tex", se star(* 0.10 ** 0.05 *** 0.01) nogap ///
stats(ymean ysd share_female sep r2_a N N_clust sep sub_fe subgender_fe ind_fe enemsub enemprio_pol4 , fmt(%3.2fc %3.2fc %3.2fc  %1s %9.3fc %9.0fc %9.0fc %1s %3s %3s %3s %3s)labels("Mean Dep.Var" "Std. Dep.Var" "Share female" " " "$\bar{R}^2$" "Number of observations"  "Number of applicants" " " "Subject FE" "Subject-gender FE" "Individual FE" "ENEM $\times$ Subject FE" "ENEM $\times$ Future Priority")) b(%7.3f) se(%7.3f)  booktabs replace f label collabels(none) keep(priority fem_priority) refcat(priority " \\ \multicolumn{6}{l}{\textit{Dependent variable: Phase 1 normalized subject-specific scores}} \\", nolabel)mtitles("Q1" "Q2" "Q3" "Q4" "Q5" "Main sample") mgroups("Did not pass P1" "P1 survivors", pattern (1 0 0 0 0 1) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) nonumbers
	
}

